library(readstata13)
library(data.table)
library(beepr)
library(ggplot2)
library(lubridate)
library(rvest)
library(dplyr)
library(stargazer)
library(starpolishr)
library(lfe)
library(MASS)
library(foreign)
rm(list=ls())
setwd("C:/Users/k2258581/OneDrive - King's College London/Drive/Research/Partisan Motivated Reasoning/Replication package")

fData <- read.dta13("gss7221_r3a.dta")
saveRDS(fData, "GSS_allyears_new.rds")

rm(list=ls())
fData <- readRDS("GSS_allyears_new.rds")
fData <- as.data.table(fData)

#convert date
#add date of interview
fData <- fData[dateintv%in%0101:0199,Interview_month:="January"]
fData <- fData[dateintv%in%0201:0299,Interview_month:="February"]
fData <- fData[dateintv%in%0301:0399,Interview_month:="March"]
fData <- fData[dateintv%in%0401:0499,Interview_month:="April"]
fData <- fData[dateintv%in%0501:0599,Interview_month:="May"]
fData <- fData[dateintv%in%0601:0699,Interview_month:="June"]
fData <- fData[dateintv%in%0701:0799,Interview_month:="July"]
fData <- fData[dateintv%in%0801:0899,Interview_month:="August"]
fData <- fData[dateintv%in%0901:0999,Interview_month:="September"]
fData <- fData[dateintv%in%1001:1099,Interview_month:="October"]
fData <- fData[dateintv%in%1101:1199,Interview_month:="November"]
fData <- fData[dateintv%in%1201:1299,Interview_month:="December"]
fData <- fData[,Date_imputed := dmy(paste0("01-",match(Interview_month,month.name),"-",year))]

# 1969-1974: Nixon (Rep)
# 1974-1976: Ford (Rep)
# 1977-1980: Carter (dem)
# 1981-1988: Reagan (rep)
# 1989-1992: Bush (rep)
# 1993-2000: Clinton (dem)
# 2001-2008: Bush (rep)
# 2009-2016: Obama (dem)
# 2017-2020: Trump (rep)
# 2021-now: Biden (dem)
fData <- fData[,Incumbent_party:=ifelse(year%in%c(1969:1976,1981:1992,2001:2008,2017:2020),"Republican","Democrat")]


fData <- fData[year%in%1969:1974,Incumbent_president:="Nixon"]
fData <- fData[year%in%1975:1976,Incumbent_president:="Ford"]
fData <- fData[year%in%1977:1980,Incumbent_president:="Carter"]
fData <- fData[year%in%1981:1988,Incumbent_president:="Reagan"]
fData <- fData[year%in%1989:1992,Incumbent_president:="Bush sr."]
fData <- fData[year%in%1993:2000,Incumbent_president:="Clinton"]
fData <- fData[year%in%2001:2008,Incumbent_president:="Bush jr."]
fData <- fData[year%in%2009:2016,Incumbent_president:="Obama"]
fData <- fData[year%in%2017:2020,Incumbent_president:="Trump"]
fData <- fData[year%in%2021:2024,Incumbent_president:="Biden"]

#party identification
fData <- fData[,Party_cat := ifelse(partyid%in%c("independent, close to democrat",
                                                 "not very strong democrat",
                                                 "strong democrat"),"Democrat",
                                    ifelse(partyid%in%c("independent, close to republican",
                                                        "not very strong republican",
                                                        "strong republican"),"Republican",
                                           ifelse(partyid=="independent (neither, no response)","Independent",NA)))]


#Impute date as the first of the month on which the interview was taken 
fData <- fData[,Date_imputed := dmy(paste0("01-",match(fData$Interview_month,month.name),"-",fData$year))]


#for 2021: put all dates as 1st of February 2021 (does not matter when actually)
fData <- fData[year==2021,Date_imputed := dmy("01-02-2021")]

#get correct cols needed in analysis
vCols <- c("id","attend","coden","popespks","postlifenv","postlifev","pray","relitenv","relitennv", "polint", "eqwlth","courts","grass","happy", "suicide1","suicide2","suicide3","suicide4", "abdefect","abnomore","abhlth","abpoor","abrape","absingle","abany", "tvhours","contv","newsfrom",  "news","conpress", "unemp", "year", "wrkstat", "age", "degree", "sex", "race", "region", "partyid", "polviews", "relig", "relig16", "confinan", "conbus", "conclerg", "coneduc", "confed", "conlabor", "conmedic", "conjudge", "consci", "conlegis", "conarmy", "realinc", "coninc", "conrinc", "wtssall", "partyid2", "partyid3", "Party_cat", "Incumbent_party", "Incumbent_president")

fData <- fData[,..vCols]

#put covariates in right format
fData <- fData[,c("unemp","female") := .(unemp=="yes",sex=="female")]


#remove variables not used in analysis
vRemove <- c("attend","coden", "popespks", "postlifenv", "postlifev", "pray", "relitenv",
             "relitennv", "polint", "eqwlth", "courts", "grass",  "happy", "suicide1", "suicide2", "suicide3",
             "suicide4", "abdefect", "abnomore", "abhlth", "abpoor", "abrape", "absingle",  "abany", "tvhours",  "contv", "newsfrom",
             "news", "conpress","relig16","confinan", "conbus", "conclerg", "coneduc", "conlabor", "conmedic", "conjudge", "consci",  "conarmy","coninc","conrinc" )
fData <- fData[,-..vRemove]

fwrite(fData,"GSS_allyears_clean.csv")

